In [2]:
import pandas as pd
import numpy as np
# Import the random forest package
from sklearn.ensemble import RandomForestClassifier
In [3]:
filename ="CrowdstormingDataJuly1st.csv"
Data = pd.read_csv(filename)
In [4]:
Data.ix[:10,:13]
Out[4]:
playerShort
player
club
leagueCountry
birthday
height
weight
position
games
victories
ties
defeats
goals
0
lucas-wilchez
Lucas Wilchez
Real Zaragoza
Spain
31.08.1983
177.0
72.0
Attacking Midfielder
1
0
0
1
0
1
john-utaka
John Utaka
Montpellier HSC
France
08.01.1982
179.0
82.0
Right Winger
1
0
0
1
0
2
abdon-prats
Abdón Prats
RCD Mallorca
Spain
17.12.1992
181.0
79.0
NaN
1
0
1
0
0
3
pablo-mari
Pablo Marí
RCD Mallorca
Spain
31.08.1993
191.0
87.0
Center Back
1
1
0
0
0
4
ruben-pena
Rubén Peña
Real Valladolid
Spain
18.07.1991
172.0
70.0
Right Midfielder
1
1
0
0
0
5
aaron-hughes
Aaron Hughes
Fulham FC
England
08.11.1979
182.0
71.0
Center Back
1
0
0
1
0
6
aleksandar-kolarov
Aleksandar Kolarov
Manchester City
England
10.11.1985
187.0
80.0
Left Fullback
1
1
0
0
0
7
alexander-tettey
Alexander Tettey
Norwich City
England
04.04.1986
180.0
68.0
Defensive Midfielder
1
0
0
1
0
8
anders-lindegaard
Anders Lindegaard
Manchester United
England
13.04.1984
193.0
80.0
Goalkeeper
1
0
1
0
0
9
andreas-beck
Andreas Beck
1899 Hoffenheim
Germany
13.03.1987
180.0
70.0
Right Fullback
1
1
0
0
0
10
antonio-rukavina
Antonio Rukavina
Real Valladolid
Spain
26.01.1984
177.0
74.0
Right Fullback
2
2
0
0
0
In [5]:
#Data.ix[:10,13:28]
In [6]:
# 1) Remove the players without rater 1 / 2 rating because we won't be
# able to train or test the values (this can be done as bonus later)
Data_hasImage = Data[pd.notnull(Data['photoID'])]
#Data_hasImage.ix[:10,13:28]
Got a lot of help from this script ! https://osf.io/w7tds/ It will be much simpler for us to train our random forest if each row corresponds to one game. This way, we won't have to give a different "weight" to each row according to the number of played games.
But let's start by doing the mean value of rater1 and rater 2, because if we keep them separated we might get some strange results. Indeed, what if for a player, rater1 = 0.0 and rater2 = 0.75 ? It would not make a lot of sense, or at least we would know our model is not viable !
In [16]:
Data_hasImage['mean_rater']=(Data_hasImage['rater1']+Data_hasImage['rater2'])/2
C:\Users\merin\Anaconda3\lib\site-packages\ipykernel\__main__.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
if __name__ == '__main__':
Out[16]:
playerShort
player
club
leagueCountry
birthday
height
weight
position
games
victories
...
refNum
refCountry
Alpha_3
meanIAT
nIAT
seIAT
meanExp
nExp
seExp
mean_rater
0
lucas-wilchez
Lucas Wilchez
Real Zaragoza
Spain
31.08.1983
177.0
72.0
Attacking Midfielder
1
0
...
1
1
GRC
0.326391
712.0
0.000564
0.396000
750.0
0.002696
0.375
1
john-utaka
John Utaka
Montpellier HSC
France
08.01.1982
179.0
82.0
Right Winger
1
0
...
2
2
ZMB
0.203375
40.0
0.010875
-0.204082
49.0
0.061504
0.750
5
aaron-hughes
Aaron Hughes
Fulham FC
England
08.11.1979
182.0
71.0
Center Back
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.125
6
aleksandar-kolarov
Aleksandar Kolarov
Manchester City
England
10.11.1985
187.0
80.0
Left Fullback
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.125
7
alexander-tettey
Alexander Tettey
Norwich City
England
04.04.1986
180.0
68.0
Defensive Midfielder
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
1.000
8
anders-lindegaard
Anders Lindegaard
Manchester United
England
13.04.1984
193.0
80.0
Goalkeeper
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.250
9
andreas-beck
Andreas Beck
1899 Hoffenheim
Germany
13.03.1987
180.0
70.0
Right Fullback
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
10
antonio-rukavina
Antonio Rukavina
Real Valladolid
Spain
26.01.1984
177.0
74.0
Right Fullback
2
2
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
11
ashkan-dejagah
Ashkan Dejagah
Fulham FC
England
05.07.1986
181.0
74.0
Left Winger
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.500
12
benedikt-hoewedes
Benedikt Höwedes
FC Schalke 04
Germany
29.02.1988
187.0
80.0
Center Back
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
13
chris-baird
Chris Baird
Fulham FC
England
25.02.1982
186.0
77.0
Defensive Midfielder
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
14
chris-brunt
Chris Brunt
West Bromwich Albion
England
14.12.1984
185.0
74.0
NaN
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.125
15
daniel-schwaab
Daniel Schwaab
Bayer Leverkusen
Germany
23.08.1988
186.0
76.0
Right Fullback
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
16
dennis-aogo
Dennis Aogo
Hamburger SV
Germany
14.01.1987
184.0
85.0
Left Fullback
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.500
17
george-mccartney
George McCartney
West Ham United
England
29.04.1981
180.0
74.0
Left Fullback
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
18
gylfi-sigurdsson
Gylfi Sigurðsson
Tottenham Hotspur
England
08.09.1989
186.0
77.0
Attacking Midfielder
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
19
ivan-obradovic
Ivan Obradović
Real Zaragoza
Spain
25.07.1988
181.0
74.0
Left Fullback
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.125
20
jan-moravek
Jan Morávek
FC Augsburg
Germany
01.11.1989
180.0
75.0
Attacking Midfielder
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.125
21
jan-rosenthal
Jan Rosenthal
SC Freiburg
Germany
07.04.1986
186.0
76.0
Attacking Midfielder
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
22
jonny-evans
Jonny Evans
Manchester United
England
02.01.1988
188.0
77.0
Center Back
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
23
kyriakos-papadopoulos
Kyriakos Papadopoulos
FC Schalke 04
Germany
23.02.1992
183.0
85.0
Center Back
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
24
marko-marin
Marko Marin
Chelsea FC
England
13.03.1989
170.0
63.0
Attacking Midfielder
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.125
25
mats-hummels
Mats Hummels
Borussia Dortmund
Germany
16.12.1988
192.0
90.0
Center Back
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.250
26
mesut-oezil
Mesut Özil
Real Madrid
Spain
15.10.1988
183.0
76.0
Attacking Midfielder
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.125
27
milorad-pekovic
Milorad Peković
SpVgg Greuther Fürth
Germany
05.08.1977
189.0
88.0
Defensive Midfielder
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.125
28
nemanja-vidic
Nemanja Vidić
Manchester United
England
21.10.1981
188.0
82.0
Center Back
2
2
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
29
neven-subotic
Neven Subotić
Borussia Dortmund
Germany
10.12.1988
193.0
88.0
Center Back
2
2
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.125
30
patrick-ebert
Patrick Ebert
Real Valladolid
Spain
17.03.1987
172.0
68.0
Left Midfielder
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
31
peter-gulacsi
Péter Gulácsi
Liverpool FC (R)
England
06.05.1990
190.0
75.0
Goalkeeper
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
32
steven-davis_2
Steven Davis
Southampton FC
England
01.01.1985
170.0
60.0
Defensive Midfielder
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
145998
mohamed-sissoko
Mohamed Sissoko
Paris Saint-Germain
France
22.01.1985
191.0
81.0
Defensive Midfielder
1
0
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
1.000
145999
nani
Nani
Manchester United
England
17.11.1986
175.0
66.0
Right Winger
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.500
146000
nedum-onuoha
Nedum Onuoha
Queens Park Rangers
England
12.11.1986
183.0
78.0
Center Back
1
0
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.875
146001
nelson-valdez
Nelson Valdez
Valencia CF
Spain
28.11.1983
178.0
73.0
Center Forward
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.250
146002
nemanja-pejcinovic
Nemanja Pejčinović
OGC Nice
France
04.11.1987
185.0
84.0
Center Back
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.250
146003
nikos-karabelas
Nikos Karabelas
Levante UD
Spain
20.12.1984
180.0
72.0
Left Fullback
1
0
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.000
146004
obafemi-martins
Obafemi Martins
Levante UD
Spain
28.10.1984
176.0
67.0
Center Forward
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
1.000
146005
petr-cech
Petr Čech
Chelsea FC
England
20.05.1982
197.0
87.0
Goalkeeper
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.250
146006
radoslav-zabavnik
Radoslav Zabavník
1. FSV Mainz 05
Germany
16.09.1980
180.0
78.0
Left Fullback
1
0
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.125
146007
rafael-van-der-vaart
Rafael van der Vaart
Hamburger SV
Germany
11.02.1983
176.0
74.0
Attacking Midfielder
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.125
146008
ricardo-vaz-te
Ricardo Vaz Tê
West Ham United
England
01.10.1986
188.0
79.0
Left Winger
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.625
146009
richard-dunne
Richard Dunne
Aston Villa
England
21.09.1979
188.0
95.0
Center Back
3
3
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.000
146010
robin-van-persie
Robin van Persie
Manchester United
England
06.08.1983
183.0
71.0
Right Winger
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.250
146011
romelu-lukaku
Romelu Lukaku
West Bromwich Albion
England
13.05.1993
193.0
95.0
Center Forward
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.750
146012
sebastian-larsson
Sebastian Larsson
Sunderland AFC
England
06.06.1985
178.0
70.0
Right Midfielder
1
0
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.000
146013
sebastian-proedl
Sebastian Prödl
Werder Bremen
Germany
21.06.1987
194.0
85.0
Center Back
1
0
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.125
146014
shaun-maloney
Shaun Maloney
Wigan Athletic
England
24.01.1983
173.0
69.0
Left Midfielder
2
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.125
146015
shaun-wright-phillips
Shaun Wright-Phillips
Queens Park Rangers
England
25.10.1981
168.0
64.0
Right Winger
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
1.000
146016
shay-given
Shay Given
Aston Villa
England
20.04.1976
188.0
84.0
Goalkeeper
3
3
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.000
146017
shola-ameobi
Shola Ameobi
Newcastle United
England
12.10.1981
191.0
84.0
NaN
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
1.000
146018
slobodan-rajkovic
Slobodan Rajković
Hamburger SV
Germany
03.02.1989
191.0
88.0
Center Back
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.000
146019
steven-taylor
Steven Taylor
Newcastle United
England
23.01.1986
188.0
81.0
NaN
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.125
146020
timmy-simons
Timmy Simons
1. FC Nürnberg
Germany
11.12.1976
186.0
79.0
Defensive Midfielder
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.000
146021
titus-bramble
Titus Bramble
Sunderland AFC
England
21.07.1981
187.0
87.0
Center Back
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.875
146022
tom-huddlestone
Tom Huddlestone
Tottenham Hotspur
England
28.12.1986
188.0
80.0
Defensive Midfielder
1
0
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.500
146023
tomas-rosicky
Tomáš Rosický
Arsenal FC
England
04.10.1980
178.0
67.0
Attacking Midfielder
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.000
146024
winston-reid
Winston Reid
West Ham United
England
03.07.1988
190.0
87.0
Center Back
1
0
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.375
146025
xherdan-shaqiri
Xherdan Shaqiri
Bayern München
Germany
10.10.1991
169.0
72.0
Left Midfielder
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.250
146026
yassine-el-ghanassi
Yassine El Ghanassi
West Bromwich Albion
England
12.07.1990
173.0
NaN
Left Winger
1
0
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.500
146027
zdenk-pospch
Zdeněk Pospěch
1. FSV Mainz 05
Germany
14.12.1978
174.0
72.0
Right Fullback
1
0
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.125
124621 rows × 29 columns
Let's now disaggregate the games:
In [37]:
game_counter = 0
game_total_number = sum(Data_hasImage['games'])
# Raw table that we'll have to convert to a dataframe later
output = [0 for i in range(game_total_number)]
# We now iterate each row of our dataframe, which may contains more that one game
for i, row in Data_hasImage.iterrows():
# Number of games in the current row
row_game_number = row['games']
# Number of cumulated cards for the games in the current row
yellowCards = row['yellowCards']
yellowReds = row['yellowReds']
redCards = row['redCards']
# We want to seperate each of these games
for j in range (row_game_number):
game = row
game['yellowCards'] = 0
game['yellowReds'] = 0
game['redCards'] = 0
# Basically, we distribute the cards we have on separate games.
# ie: if we have 2 yellowCard and 1 redCard for a total of 4 games,
# the first two games will be assigned a yellowCard,
# the third game will be assigned a redCard,
# and the last game won't have any card assigned, because there is no card left.
if yellowCards > 0:
game['yellowCards'] = 1
yellowCards = yellowCards - 1
elif yellowReds > 0:
game['yellowReds'] = 1
yellowReds = yellowReds - 1
elif redCards > 0:
game['redCards'] = 1
redCards = redCards - 1
# Convert from pandas Series to prevent overwriting previous values of the output
gamelist=list(game)
# Add the new game to the output
output[game_counter] = gamelist
game_counter = game_counter + 1
# Here is the output dataframe
Data_OneGamePerRow = pd.DataFrame(output, columns=list(Data_hasImage.columns))
Data_OneGamePerRow
Out[37]:
playerShort
player
club
leagueCountry
birthday
height
weight
position
games
victories
...
refNum
refCountry
Alpha_3
meanIAT
nIAT
seIAT
meanExp
nExp
seExp
mean_rater
0
lucas-wilchez
Lucas Wilchez
Real Zaragoza
Spain
31.08.1983
177.0
72.0
Attacking Midfielder
1
0
...
1
1
GRC
0.326391
712.0
0.000564
0.396000
750.0
0.002696
0.375
1
john-utaka
John Utaka
Montpellier HSC
France
08.01.1982
179.0
82.0
Right Winger
1
0
...
2
2
ZMB
0.203375
40.0
0.010875
-0.204082
49.0
0.061504
0.750
2
aaron-hughes
Aaron Hughes
Fulham FC
England
08.11.1979
182.0
71.0
Center Back
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.125
3
aleksandar-kolarov
Aleksandar Kolarov
Manchester City
England
10.11.1985
187.0
80.0
Left Fullback
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.125
4
alexander-tettey
Alexander Tettey
Norwich City
England
04.04.1986
180.0
68.0
Defensive Midfielder
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
1.000
5
anders-lindegaard
Anders Lindegaard
Manchester United
England
13.04.1984
193.0
80.0
Goalkeeper
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.250
6
andreas-beck
Andreas Beck
1899 Hoffenheim
Germany
13.03.1987
180.0
70.0
Right Fullback
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
7
antonio-rukavina
Antonio Rukavina
Real Valladolid
Spain
26.01.1984
177.0
74.0
Right Fullback
2
2
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
8
antonio-rukavina
Antonio Rukavina
Real Valladolid
Spain
26.01.1984
177.0
74.0
Right Fullback
2
2
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
9
ashkan-dejagah
Ashkan Dejagah
Fulham FC
England
05.07.1986
181.0
74.0
Left Winger
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.500
10
benedikt-hoewedes
Benedikt Höwedes
FC Schalke 04
Germany
29.02.1988
187.0
80.0
Center Back
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
11
chris-baird
Chris Baird
Fulham FC
England
25.02.1982
186.0
77.0
Defensive Midfielder
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
12
chris-brunt
Chris Brunt
West Bromwich Albion
England
14.12.1984
185.0
74.0
NaN
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.125
13
daniel-schwaab
Daniel Schwaab
Bayer Leverkusen
Germany
23.08.1988
186.0
76.0
Right Fullback
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
14
dennis-aogo
Dennis Aogo
Hamburger SV
Germany
14.01.1987
184.0
85.0
Left Fullback
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.500
15
george-mccartney
George McCartney
West Ham United
England
29.04.1981
180.0
74.0
Left Fullback
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
16
gylfi-sigurdsson
Gylfi Sigurðsson
Tottenham Hotspur
England
08.09.1989
186.0
77.0
Attacking Midfielder
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
17
ivan-obradovic
Ivan Obradović
Real Zaragoza
Spain
25.07.1988
181.0
74.0
Left Fullback
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.125
18
jan-moravek
Jan Morávek
FC Augsburg
Germany
01.11.1989
180.0
75.0
Attacking Midfielder
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.125
19
jan-rosenthal
Jan Rosenthal
SC Freiburg
Germany
07.04.1986
186.0
76.0
Attacking Midfielder
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
20
jonny-evans
Jonny Evans
Manchester United
England
02.01.1988
188.0
77.0
Center Back
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
21
kyriakos-papadopoulos
Kyriakos Papadopoulos
FC Schalke 04
Germany
23.02.1992
183.0
85.0
Center Back
1
0
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
22
marko-marin
Marko Marin
Chelsea FC
England
13.03.1989
170.0
63.0
Attacking Midfielder
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.125
23
mats-hummels
Mats Hummels
Borussia Dortmund
Germany
16.12.1988
192.0
90.0
Center Back
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.250
24
mesut-oezil
Mesut Özil
Real Madrid
Spain
15.10.1988
183.0
76.0
Attacking Midfielder
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.125
25
milorad-pekovic
Milorad Peković
SpVgg Greuther Fürth
Germany
05.08.1977
189.0
88.0
Defensive Midfielder
1
1
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.125
26
nemanja-vidic
Nemanja Vidić
Manchester United
England
21.10.1981
188.0
82.0
Center Back
2
2
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
27
nemanja-vidic
Nemanja Vidić
Manchester United
England
21.10.1981
188.0
82.0
Center Back
2
2
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.000
28
neven-subotic
Neven Subotić
Borussia Dortmund
Germany
10.12.1988
193.0
88.0
Center Back
2
2
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.125
29
neven-subotic
Neven Subotić
Borussia Dortmund
Germany
10.12.1988
193.0
88.0
Center Back
2
2
...
4
4
LUX
0.325185
127.0
0.003297
0.538462
130.0
0.013752
0.125
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
373037
nikos-karabelas
Nikos Karabelas
Levante UD
Spain
20.12.1984
180.0
72.0
Left Fullback
1
0
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.000
373038
obafemi-martins
Obafemi Martins
Levante UD
Spain
28.10.1984
176.0
67.0
Center Forward
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
1.000
373039
petr-cech
Petr Čech
Chelsea FC
England
20.05.1982
197.0
87.0
Goalkeeper
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.250
373040
radoslav-zabavnik
Radoslav Zabavník
1. FSV Mainz 05
Germany
16.09.1980
180.0
78.0
Left Fullback
1
0
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.125
373041
rafael-van-der-vaart
Rafael van der Vaart
Hamburger SV
Germany
11.02.1983
176.0
74.0
Attacking Midfielder
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.125
373042
ricardo-vaz-te
Ricardo Vaz Tê
West Ham United
England
01.10.1986
188.0
79.0
Left Winger
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.625
373043
richard-dunne
Richard Dunne
Aston Villa
England
21.09.1979
188.0
95.0
Center Back
3
3
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.000
373044
richard-dunne
Richard Dunne
Aston Villa
England
21.09.1979
188.0
95.0
Center Back
3
3
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.000
373045
richard-dunne
Richard Dunne
Aston Villa
England
21.09.1979
188.0
95.0
Center Back
3
3
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.000
373046
robin-van-persie
Robin van Persie
Manchester United
England
06.08.1983
183.0
71.0
Right Winger
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.250
373047
romelu-lukaku
Romelu Lukaku
West Bromwich Albion
England
13.05.1993
193.0
95.0
Center Forward
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.750
373048
sebastian-larsson
Sebastian Larsson
Sunderland AFC
England
06.06.1985
178.0
70.0
Right Midfielder
1
0
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.000
373049
sebastian-proedl
Sebastian Prödl
Werder Bremen
Germany
21.06.1987
194.0
85.0
Center Back
1
0
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.125
373050
shaun-maloney
Shaun Maloney
Wigan Athletic
England
24.01.1983
173.0
69.0
Left Midfielder
2
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.125
373051
shaun-maloney
Shaun Maloney
Wigan Athletic
England
24.01.1983
173.0
69.0
Left Midfielder
2
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.125
373052
shaun-wright-phillips
Shaun Wright-Phillips
Queens Park Rangers
England
25.10.1981
168.0
64.0
Right Winger
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
1.000
373053
shay-given
Shay Given
Aston Villa
England
20.04.1976
188.0
84.0
Goalkeeper
3
3
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.000
373054
shay-given
Shay Given
Aston Villa
England
20.04.1976
188.0
84.0
Goalkeeper
3
3
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.000
373055
shay-given
Shay Given
Aston Villa
England
20.04.1976
188.0
84.0
Goalkeeper
3
3
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.000
373056
shola-ameobi
Shola Ameobi
Newcastle United
England
12.10.1981
191.0
84.0
NaN
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
1.000
373057
slobodan-rajkovic
Slobodan Rajković
Hamburger SV
Germany
03.02.1989
191.0
88.0
Center Back
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.000
373058
steven-taylor
Steven Taylor
Newcastle United
England
23.01.1986
188.0
81.0
NaN
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.125
373059
timmy-simons
Timmy Simons
1. FC Nürnberg
Germany
11.12.1976
186.0
79.0
Defensive Midfielder
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.000
373060
titus-bramble
Titus Bramble
Sunderland AFC
England
21.07.1981
187.0
87.0
Center Back
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.875
373061
tom-huddlestone
Tom Huddlestone
Tottenham Hotspur
England
28.12.1986
188.0
80.0
Defensive Midfielder
1
0
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.500
373062
tomas-rosicky
Tomáš Rosický
Arsenal FC
England
04.10.1980
178.0
67.0
Attacking Midfielder
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.000
373063
winston-reid
Winston Reid
West Ham United
England
03.07.1988
190.0
87.0
Center Back
1
0
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.375
373064
xherdan-shaqiri
Xherdan Shaqiri
Bayern München
Germany
10.10.1991
169.0
72.0
Left Midfielder
1
1
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.250
373065
yassine-el-ghanassi
Yassine El Ghanassi
West Bromwich Albion
England
12.07.1990
173.0
NaN
Left Winger
1
0
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.500
373066
zdenk-pospch
Zdeněk Pospěch
1. FSV Mainz 05
Germany
14.12.1978
174.0
72.0
Right Fullback
1
0
...
3147
21
HUN
0.376127
574.0
0.000714
0.498350
606.0
0.002968
0.125
373067 rows × 29 columns
In [44]:
# Removing columns that we do not need
Data_Simple1 = Data_OneGamePerRow[['playerShort', 'yellowCards', 'yellowReds', 'redCards',
'refNum', 'refCountry', 'mean_rater']]
# Take a random 80% sample of the Data for the Training Sample
Data_Training = Data_Simple1.sample(frac=0.8)
# Take a random 20% sample of the Data for the Testing Sample
Data_Testing = Data_Simple1.loc[~Data_Simple1.index.isin(Data_Training.index)]
In [45]:
Data_Simple1
Out[45]:
playerShort
yellowCards
yellowReds
redCards
refNum
refCountry
mean_rater
0
lucas-wilchez
0
0
0
1
1
0.375
1
john-utaka
1
0
0
2
2
0.750
2
aaron-hughes
0
0
0
4
4
0.125
3
aleksandar-kolarov
0
0
0
4
4
0.125
4
alexander-tettey
0
0
0
4
4
1.000
5
anders-lindegaard
0
0
0
4
4
0.250
6
andreas-beck
0
0
0
4
4
0.000
7
antonio-rukavina
1
0
0
4
4
0.000
8
antonio-rukavina
0
0
0
4
4
0.000
9
ashkan-dejagah
0
0
0
4
4
0.500
10
benedikt-hoewedes
0
0
0
4
4
0.000
11
chris-baird
0
0
0
4
4
0.000
12
chris-brunt
0
0
0
4
4
0.125
13
daniel-schwaab
0
0
0
4
4
0.000
14
dennis-aogo
0
0
0
4
4
0.500
15
george-mccartney
0
0
0
4
4
0.000
16
gylfi-sigurdsson
0
0
0
4
4
0.000
17
ivan-obradovic
1
0
0
4
4
0.125
18
jan-moravek
0
0
0
4
4
0.125
19
jan-rosenthal
0
0
0
4
4
0.000
20
jonny-evans
0
0
0
4
4
0.000
21
kyriakos-papadopoulos
0
0
0
4
4
0.000
22
marko-marin
0
0
0
4
4
0.125
23
mats-hummels
0
0
0
4
4
0.250
24
mesut-oezil
0
0
0
4
4
0.125
25
milorad-pekovic
0
0
0
4
4
0.125
26
nemanja-vidic
0
0
0
4
4
0.000
27
nemanja-vidic
0
0
0
4
4
0.000
28
neven-subotic
0
0
0
4
4
0.125
29
neven-subotic
0
0
0
4
4
0.125
...
...
...
...
...
...
...
...
373037
nikos-karabelas
0
0
0
3147
21
0.000
373038
obafemi-martins
0
0
0
3147
21
1.000
373039
petr-cech
0
0
0
3147
21
0.250
373040
radoslav-zabavnik
0
0
0
3147
21
0.125
373041
rafael-van-der-vaart
0
0
0
3147
21
0.125
373042
ricardo-vaz-te
0
0
0
3147
21
0.625
373043
richard-dunne
0
0
0
3147
21
0.000
373044
richard-dunne
0
0
0
3147
21
0.000
373045
richard-dunne
0
0
0
3147
21
0.000
373046
robin-van-persie
0
0
0
3147
21
0.250
373047
romelu-lukaku
0
0
0
3147
21
0.750
373048
sebastian-larsson
0
0
0
3147
21
0.000
373049
sebastian-proedl
0
0
0
3147
21
0.125
373050
shaun-maloney
0
0
0
3147
21
0.125
373051
shaun-maloney
0
0
0
3147
21
0.125
373052
shaun-wright-phillips
0
0
0
3147
21
1.000
373053
shay-given
0
0
0
3147
21
0.000
373054
shay-given
0
0
0
3147
21
0.000
373055
shay-given
0
0
0
3147
21
0.000
373056
shola-ameobi
1
0
0
3147
21
1.000
373057
slobodan-rajkovic
0
0
0
3147
21
0.000
373058
steven-taylor
0
0
0
3147
21
0.125
373059
timmy-simons
0
0
0
3147
21
0.000
373060
titus-bramble
0
0
0
3147
21
0.875
373061
tom-huddlestone
0
0
0
3147
21
0.500
373062
tomas-rosicky
0
0
0
3147
21
0.000
373063
winston-reid
0
0
0
3147
21
0.375
373064
xherdan-shaqiri
0
0
0
3147
21
0.250
373065
yassine-el-ghanassi
0
0
0
3147
21
0.500
373066
zdenk-pospch
0
0
0
3147
21
0.125
373067 rows × 7 columns
In [38]:
# 1) Remove the players without rater 1 / 2 rating because we won't be
# able to train or test the values (this can be done as bonus later)
Data_hasImage = Data[pd.notnull(Data['photoID'])]
#Data_hasImage.ix[:10,13:28]
In [57]:
#for i in Data.iterrows():
# if Data.at[i, 'games'] > 1:
# df = Data[i]
In [65]:
# Testing - divide # cards (all types) by # games column
Data_hasImage['fractionYellow'] = Data_hasImage['yellowCards']/Data_hasImage['games']
Data_hasImage['fractionYellowRed'] = Data_hasImage['yellowReds']/Data_hasImage['games']
Data_hasImage['fractionRed'] = Data_hasImage['redCards']/Data_hasImage['games']
# Get the average of the raters
Data_hasImage['raterAvg'] = (Data_hasImage['rater1']+Data_hasImage['rater2'])/2
#Data_hasImage.head()
/Users/paramoed/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
from ipykernel import kernelapp as app
/Users/paramoed/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
app.launch_new_instance()
/Users/paramoed/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:4: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/Users/paramoed/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:7: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
In [104]:
# Removing columns that we do not need
Data_Simple1 = Data_hasImage[['games', 'fractionYellow', 'fractionYellowRed', 'fractionRed',
'refNum', 'refCountry', 'raterAvg']]
#cols = ['playerShort', 'games', 'fractionYellow', 'fractionYellowRed', 'fractionRed',
cols = ['games', 'fractionYellow', 'fractionYellowRed', 'fractionRed', 'refNum', 'refCountry']
colsRes = ['raterAvg']
# Take a random 80% sample of the Data for the Training Sample
Data_Training = Data_Simple1.sample(frac=0.8)
# Need to split this into the data and the results columns
# http://stackoverflow.com/questions/34246336/python-randomforest-unknown-label-error
Input_Data_Training = Data_Training.drop(colsRes, axis=1)
#Results_Data_Training = list(Data_Training.raterAvg.values)
Results_Data_Training = Data_Training[colsRes]
In [90]:
# Take a random 20% sample of the Data for the Testing Sample
#Data_Testing = Data_Simple1.loc[~Data_Simple1.index.isin(Data_Training.index)]
# Need to split this into the data and the results columns
# http://stackoverflow.com/questions/34246336/python-randomforest-unknown-label-error
#Input_Data_Testing = Data_Testing.drop(colsRes, axis=1)
#Results_Data_Testing = list(Data_Testing.raterAvg.values)
In [112]:
# Need to make arrays
# http://www.analyticbridge.com/profiles/blogs/random-forest-in-python
trainArr = Input_Data_Training.as_matrix(cols) #training array
trainRes = Results_Data_Training.as_matrix(colsRes) #training results
#trainRes = np.asarray(Data_Training['raterAvg'], dtype="|S6")
trainRes
Out[112]:
array([[ 0. ],
[ 0. ],
[ 0.125],
...,
[ 0. ],
[ 0.125],
[ 0.75 ]])
In [110]:
#Initialize
forest = RandomForestClassifier(n_estimators = 100)
# Fit the training data and create the decision trees
forest.fit(trainArr,trainRes)
# Take the same decision trees and run it on the test data
#testArr = test.as_matrix(cols)
#results = rf.predict(testArr)
#test['predictions'] = results
#test.head()
Out[110]:
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
max_depth=None, max_features='auto', max_leaf_nodes=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
oob_score=False, random_state=None, verbose=0,
warm_start=False)
Content source: Merinorus/adaisawesome
Similar notebooks: